######################Simulations for posterior predictive p-values#############
#############################data generating functions##########################

######################The regular DGP(without extreme ps)#######################
Regular_dta_generator <- function(params)
{
########################load variables from params##############################
  beta.reg0 = params$beta.reg0
  beta.reg1 = params$beta.reg1
  beta.logis = params$beta.logis
###################use cal.ps.sum() to calculate \sum pi_i-n_1##################
  cal.ps.sum <- function(theta,x_beta){
    return(sum(plogis(x_beta+theta))-params$N.1)
  }
  
###############################generate covariates##############################
  x <- matrix(data = NA,nrow = N,ncol = 4*K)
  beta.logis <- rep(beta.logis,K)
  beta.reg1 <- rep(beta.reg1,K)
  beta.reg0 <- rep(beta.reg0,K)
  xf.1 <- matrix(rep(1,N),ncol = 1)
  colnames(xf.1) <- c("V1")
  for (i in 1:K) {
    z <- matrix(c(rbinom(N,1,0.5),z2=runif(N,0,2),z3=rexp(N,1),z4=rchisq(N,4)),ncol = 4)
    x[,4*i-3] <- z[,1]
    x[,4*i-2] <- z[,2]+0.3*x[,4*i-3]
    x[,4*i-1] <- z[,3]+0.2*(x[,4*i-3]*x[,4*i-2]-x[,4*i-2])
    x[,4*i] <- z[,4]+0.1*(x[,4*i-3]+x[,4*i-1]+x[,4*i-1]*x[,4*i-2])
    xf.1 <- cbind(xf.1,z[,2],z[,3])
  }
  xt.1 <- cbind(rep(1,N),x)
  x_c <- t(apply(x, 1, FUN = function(a){a-rep(c(0.5,1.15,0.9,4.24),K)}))
  if(is.na(params$theta)){
##################### calculate theta such that \sum pi_i=n_1###################
    params$theta <<- uniroot(cal.ps.sum,c(-2,0),extendInt = "yes",x_beta = x%*%beta.logis)$root
    params$theta <- uniroot(cal.ps.sum,c(-2,0),extendInt = "yes",x_beta = x%*%beta.logis)$root
  }
  if(params$extr == T){
    x[,2] <- exp(x[,2])
    x[,3] <- exp(x[,3]/3)
    x[,4] <- exp(x[,4]/3.5)
  }
  ps <- plogis(x%*%beta.logis+params$theta)
############## and sampling treatment vector by the propensity scores###########
  t <- ifelse(runif(N)<ps,1,0)
  
########## generating potential outcomes y.0 y.1 and observed outcomes y #######
#### using is.indiv to indicate whether the null hypothesis is 0 individual treatment effect
  ep.0 <- rnorm(N,sd = params$sd0)
  y.0 <- params$sign*(x_c%*%beta.reg0 +params$mu.0 + ep.0)
  if(params$is.indiv)
    y.1 <- y.0+params$tau
  else{
    ep.1 <- rnorm(N,sd = params$sd1)
    y.1 <- params$sign*(x_c%*%beta.reg1 + params$mu.0 + params$tau + ep.1)
  }
  y <- ifelse(t==1,y.1,y.0)
  
  return(list(ps=ps,t.t=t,t.f=t,y=y,xat=cbind(rep(1,N),x),xaf=xf.1,x1t=cbind(xt.1),x1f=xf.1,x0t=cbind(xt.1),x0f=xf.1,y.1 = y.1,y.0 = y.0))
}
#########################The extreme DGP(with extreme ps)#######################
Extreme_dta_generator <- function(params)
{
  x       = matrix(rnorm(N*2), N, 2)
  x.1     = cbind(1, x)
  x1      = cbind(1, x, exp(x))
  xt      = cbind(1, exp(x))
  beta.z  = c(-1, 0, 0, 1, -1)
  pscore  = 1/(1 + exp(- as.vector(x1%*%beta.z)))
  z       = rbinom(N, 1, pscore)
  beta.y1 = c(1 - 0.2 * exp(1/2), 0, 0, 0.2, -0.1)
  beta.y0 = c(1, 0, 0, -0.2, 0.1)
  y1      = params$sign*rnorm(N, x1%*%beta.y1)
  y0      = params$sign*rnorm(N, x1%*%beta.y0)
  if(params$is.indiv == T)
    y = y0
  else
    y = z*y1 + (1 - z)*y0
  return(list(ps=pscore,t.t=z,t.f=z,y=y,xat=xt,xaf=x.1,x1t=xt,x1f=x.1,x0t=xt,x0f=x.1,logic.t.now=as.logical(z),y.1 = y1,y.0 = y0))
}
############################A real dataset, BMI data############################
BMI_dta_generator <- function(params)
{
  library(ATE)
  data(nhanes_bmi)
  t = nhanes_bmi$School_meal
  y = nhanes_bmi$BMI
  x = as.matrix(nhanes_bmi[, -c(1, 2)])
  x <- cbind(1,x)
  N <<- length(y)
  return(list(ps=NA,t.t=t,t.f=t,y=y,xat=x,xaf=x,x1t=x,x1f=x,x0t=x,x0f=x,logic.t.now=as.logical(t)))
}
#################################A real dataset, cps data#######################
CPS_dta_generator <- function(params)
{
  cps1re74 <- read_table("cps1re74.csv")
  dta <- cps1re74
  t <- dta$`"treat"`
  logic.t.now <- as.logical(t)
  y <- dta$`"re78"`
  dta <- dta[,2:9]
  colnames(dta) <- c('age','schooling','black','hispan','married','HSdrop','re74','re75')
  dta <- cbind(dta,ue74=as.numeric(dta[,7]==0),ue75=as.numeric(dta[,8]==0))
  if(params$is.interaction){
    for(i in 1:10){
      if(i > 2)
        j = i-1
      else
        j = i
      dta <- cbind(dta,dta[,i]*dta[,1:j])
    }
    dta <- dta[,c(-18,-24,-40,-47,-56)]
  }
  if(params$match.num > 0){
    match.rslt <- Match(y,t,as.matrix(dta),replace = F,ties = F, M = params$match.num)
    index.treated <- unique(match.rslt$index.treated)
    index.control <- unique(match.rslt$index.control)
    indices <- c(index.treated,index.control)
  }
  else
    indices <- 1:nrow(dta)
  x <- as.matrix(dta[indices,])
  t <- t[indices]
  y <- y[indices]
  N <<- length(y)
  return(list(ps=NA,t.t=t,t.f=t,y=y,xat=x,xaf=x,x1t=x,x1f=x,x0t=x,x0f=x,logic.t.now=as.logical(t)))
}


#################################THE MAIN FUNCTION##############################
##########Use the value of params$data to determine which DGP to use############
dta_generator <- function(params)
{
  data.use <- switch(params$data,
                     "extreme" = Extreme_dta_generator,
                     "bmi" = BMI_dta_generator,
                     "cps" = CPS_dta_generator,
                     "regular" = Regular_dta_generator)
  return(data.use(params))
}
